1 LOADING DATASETS

## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp0hmOPz/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp0hmOPz/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp0hmOPz/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp0hmOPz/downloaded_packages

2 SIMPLIFYING DATA SET

DimPlot(object = SO7, reduction = "umap", group.by = "subclass_MD", label = TRUE)

markers.to.plot_MD <- c(
  # type_1
  "Pappa2",
  "Aard",
  # type_2
  "Egf",
  "Fabp3",
  "Ktr7",

  # type_3
  "Fos",
  "Egr1",
  "Atf3",

  # type_4
  "S100g",

  # type_5
  "Cxcl10"
)

DotPlot(SO7,
features = markers.to.plot_MD,
group.by = "subclass_MD",
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()
## Warning: The following requested variables were not found: Ktr7

DimPlot(SO7, split.by = "treatment")

markers.to.plot_MD2 <- c(
  
  # type_1
  "Pappa2",
 
  # type_2
  "Egf",

  # type_3
  "Fos",

  # type_4
  "S100g",

  # type_5
  "Cxcl10"
)

DotPlot(SO7,
features = markers.to.plot_MD2,
group.by = "subclass_MD",
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

# Viewing Each Cluster with its DEGs

Idents(SO7) <- "subclass_MD"

df <- FindMarkers(
  object = SO7,
  ident.1 = "type_1",
  ident.2 = NULL,
  logfc.threshold = 0.25,
  min.pct = 0.25
)
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
  all_markers <- FindAllMarkers(
  SO7,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25)
## Calculating cluster type_2
## Calculating cluster type_1
## Calculating cluster type_5
## Calculating cluster type_3
## Calculating cluster type_4
  # Getting top markers for all 5 types
  type1_markers <- subset(all_markers, cluster == "type_1")
  type2_markers <- subset(all_markers, cluster == "type_2")
  type3_markers <- subset(all_markers, cluster == "type_3")
  type4_markers <- subset(all_markers, cluster == "type_4")
  type5_markers <- subset(all_markers, cluster == "type_5")
  
  
  
  top_t1 <- rownames(type1_markers)[order(type1_markers$avg_log2FC, decreasing = TRUE)[1:10]]
  FeaturePlot(SO7,"Aard")
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  FeaturePlot(SO7,"Dctd")

  FeaturePlot(SO7,"Pappa2")

  top_t2 <- rownames(type2_markers)[order(type2_markers$avg_log2FC, decreasing = TRUE)[1:10]]
  FeaturePlot(SO7,"Foxq1")

  FeaturePlot(SO7,"Glod5")

  top_t3 <- rownames(type3_markers)[order(type3_markers$avg_log2FC, decreasing = TRUE)[1:10]]
  FeaturePlot(SO7,"Fosb")

  FeaturePlot(SO7,"Egr1")

  FeaturePlot(SO7,"Atf3")

  top_t4 <- rownames(type4_markers)[order(type4_markers$avg_log2FC, decreasing = TRUE)[1:10]]
  FeaturePlot(SO7,"S100g")

  top_t5 <- rownames(type5_markers)[order(type5_markers$avg_log2FC, decreasing = TRUE)[1:10]]
  FeaturePlot(SO7,"Cxcl10")

3 Violin plots to show further information

# Violin plots for top marker genes of each type cluster
# For type 1
VlnPlot(SO7, features = c("Aard", "Dctd", "Pappa2"))

# For type 2
VlnPlot(SO7, features = c("Foxq1", "Glod5"))

# For type 3
VlnPlot(SO7, features = c("Fosb", "Egr1", "Atf3"))

# For type 4
VlnPlot(SO7, features = c("S100g"))

# For type 5
VlnPlot(SO7, features = c("Cxcl10"))

# Difference between low_salt vs control

# Split your object by the "treatment" column
seurat_list <- SplitObject(SO7, split.by = "treatment")

# Extract the control and low_salt objects
control_obj <- seurat_list$control
low_salt_obj <- seurat_list$low_salt

table(Idents(control_obj))
## 
## type_2 type_1 type_5 type_3 type_4 
##   1180   3372     41    137    246
table(Idents(low_salt_obj))
## 
## type_2 type_1 type_5 type_3 type_4 
##    985   4887     84    380     92
type1_obj <- subset(SO7, idents = "type_1")
markers_type1 <- FindMarkers(
  type1_obj,
  ident.1 = "low_salt",
  ident.2 = "control",
  group.by = "treatment",
  logfc.threshold = 0.25,
  min.pct = 0.25
)
  
df<- markers_type1 %>% arrange(desc(avg_log2FC))

df
test <- head(rownames(df), n = 3)

VlnPlot(SO7, features = test, group.by = "treatment")

test2 <- tail(rownames(df), n = 3)

VlnPlot(SO7, features = test2, group.by = "treatment")

VlnPlot(SO7, features = c("Ckb", "Egfl6", "Ide"), group.by = "treatment")

4 Volcano Plot

df2 <- df %>% filter(p_val_adj < 0.05)

top5 <- rownames(df2)[order(df2$avg_log2FC, decreasing = TRUE)[1:10]]
bottom5 <- rownames(df2)[order(df2$avg_log2FC, decreasing = FALSE)[1:10]]

# Combine them into a single vector
selected_genes <- c(top5, bottom5)

EnhancedVolcano(df2,
                lab = rownames(df2),
                selectLab = selected_genes,
                title = (""),
                subtitle = NULL,
                caption = NULL,
                x = 'avg_log2FC', 
                legendLabels = NULL, 
                FCcutoff = 0.25, 
                y = 'p_val_adj', 
                labSize = 4, 
                legendIconSize = 4, 
                drawConnectors = T,
                max.overlap = 10,
                xlim = c(-2.5, 2.5),
                widthConnectors = 0.5) + 
  theme_classic(base_size = 16) +  # Increases overall font size
  theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
        axis.text = element_text(size = 14),  # Axis tick labels
        axis.title = element_text(size = 16, face = "bold"),  # Axis labels
        axis.line = element_line(size = 1.2),  # Increases axis border thickness
        legend.position = "none")
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest
## non-zero p-value...
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Downregulated Pathways of DEG’s

DEG_list <- df2

markers <- DEG_list %>% rownames_to_column(var="SYMBOL")

head(markers, n = 50)
ENTREZ_list <- bitr(
  geneID = rownames(DEG_list),
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = rownames(DEG_list), fromType = "SYMBOL", toType =
## "ENTREZID", : 7.95% of input gene IDs are fail to map...
markers <-  ENTREZ_list %>% inner_join(markers, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers <-  markers %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers <-  markers %>% dplyr::filter(avg_log2FC < 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks <- pos.markers$ENTREZID[abs(pos.markers$avg_log2FC) > 0]
#head(pos.ranks)

pos_go <- enrichGO(gene = pos.ranks,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:138] "667014" "667937" "665407" "100039571" "54156" "666793" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...34 enriched terms found
## 'data.frame':    34 obs. of  12 variables:
##  $ ID            : chr  "GO:0097250" "GO:0045333" "GO:0009060" "GO:0015980" ...
##  $ Description   : chr  "mitochondrial respirasome assembly" "cellular respiration" "aerobic respiration" "energy derivation by oxidation of organic compounds" ...
##  $ GeneRatio     : chr  "7/88" "9/88" "8/88" "10/88" ...
##  $ BgRatio       : chr  "114/28928" "271/28928" "206/28928" "380/28928" ...
##  $ RichFactor    : num  0.0614 0.0332 0.0388 0.0263 0.1064 ...
##  $ FoldEnrichment: num  20.19 10.92 12.77 8.65 34.97 ...
##  $ zScore        : num  11.34 9.06 9.36 8.29 12.87 ...
##  $ pvalue        : num  5.98e-08 1.46e-07 2.28e-07 2.47e-07 3.22e-07 ...
##  $ p.adjust      : num  8.43e-05 8.43e-05 8.43e-05 8.43e-05 8.43e-05 ...
##  $ qvalue        : num  7.18e-05 7.18e-05 7.18e-05 7.18e-05 7.18e-05 ...
##  $ geneID        : chr  "Ndufa3/Fmc1/Pet100/Ndufb4c/Ndufa1/Ndufc1/Ndufa5" "Ide/Ndufa3/Ndufa1/Cs/Cox6c/Ndufc1/Cox7c/Got2/Ndufa5" "Ide/Ndufa3/Ndufa1/Cs/Cox6c/Ndufc1/Cox7c/Ndufa5" "Ide/Ndufa3/Ugp2/Ndufa1/Cs/Cox6c/Ndufc1/Cox7c/Got2/Ndufa5" ...
##  $ Count         : int  7 9 8 10 5 5 5 6 7 5 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
dotplot(pos_go) +
    ggtitle("") +
    theme(plot.title = element_text(hjust = 0.5)) + 
    theme_classic() + 
    scale_x_reverse()  # Reverses the x-axis

5 Upregulated Pathway of DEG’s

DEG_list <- df2

markers <- DEG_list %>% rownames_to_column(var="SYMBOL")

head(markers, n = 50)
ENTREZ_list <- bitr(geneID = rownames(DEG_list),   #input gene id
                    fromType = "SYMBOL",           #input id type
                    toType = "ENTREZID",           #output id type
                    OrgDb = "org.Mm.eg.db"         #annotation Db
                    )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = rownames(DEG_list), fromType = "SYMBOL", toType =
## "ENTREZID", : 7.95% of input gene IDs are fail to map...
markers <-  ENTREZ_list %>% inner_join(markers, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers <-  markers %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers <-  markers %>% dplyr::filter(avg_log2FC > 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks <- pos.markers$ENTREZID[abs(pos.markers$avg_log2FC) > 0]
#head(pos.ranks)

pos_go <- enrichGO(gene = pos.ranks,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:267] "16010" "239435" "23850" "70337" "19225" "66815" "11423" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...96 enriched terms found
## 'data.frame':    96 obs. of  12 variables:
##  $ ID            : chr  "GO:0006457" "GO:0061077" "GO:0042026" "GO:0051084" ...
##  $ Description   : chr  "protein folding" "chaperone-mediated protein folding" "protein refolding" "'de novo' post-translational protein folding" ...
##  $ GeneRatio     : chr  "15/232" "10/232" "6/232" "7/232" ...
##  $ BgRatio       : chr  "185/28928" "69/28928" "24/28928" "44/28928" ...
##  $ RichFactor    : num  0.0811 0.1449 0.25 0.1591 0.1522 ...
##  $ FoldEnrichment: num  10.1 18.1 31.2 19.8 19 ...
##  $ zScore        : num  11.2 12.8 13.3 11.2 11 ...
##  $ pvalue        : num  3.02e-11 2.04e-10 2.97e-08 5.80e-08 8.00e-08 ...
##  $ p.adjust      : num  9.31e-08 3.14e-07 3.06e-05 4.48e-05 4.93e-05 ...
##  $ qvalue        : num  7.54e-08 2.55e-07 2.48e-05 3.63e-05 4.00e-05 ...
##  $ geneID        : chr  "Hspb1/Hspa8/Hspa1a/Hspa1b/Clu/Hspa5/Ppil1/Hsp90aa1/Fkbp5/Sdf2l1/Hsp90b1/Ahsa1/Selenof/Ppid/Cct3" "Hspb1/Hspa8/Hspa1a/Hspa1b/Clu/Hspa5/Fkbp5/Sdf2l1/Ppid/Cct3" "Hspb1/Hspa8/Hspa1a/Hspa1b/Hspa5/Hsp90aa1" "Hspa8/Hspa1a/Hspa1b/Hspa5/Sdf2l1/Selenof/Cct3" ...
##  $ Count         : int  15 10 6 7 7 17 10 9 6 6 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
dotplot(pos_go) +
    ggtitle("") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 lines
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

6 Analysis Mine vs Article

# Combine all genes into a single vector
genes_to_plot <- c("Fabp3", "Egf", "Ccn1", "Foxq1", "Cxcl12",
                   "Vash2", "Pamr1", "Vegfa", "Ccn3",
                   "Bmp3", "Fgf9", "Spp1", "Wnt10a", "Sfrp1", "Tcf4")

# Generate FeaturePlots for all genes
FeaturePlot(SO7, features = genes_to_plot, 
            reduction = "umap", 
            pt.size = 0.5, 
            ncol = 4)

# Define gene groups
group1 <- c("Aard", "Dctd", "Pappa2")           # Type 1 
group2 <- c("Foxq1", "Glod5")                   # Type 2
group3 <- c("Fosb", "Egr1", "Atf3")             # Type 3
group4 <- c("S100g")                            # Type 4
group5 <- c("Cxcl10")                           # Type 5

# Create Feature plots for each group
plot1 <- FeaturePlot(SO7, features = group1, pt.size = 0) + ggtitle("Type 1 Markers")
plot2 <- FeaturePlot(SO7, features = group2, pt.size = 0) + ggtitle("Type 2 Markers")
plot3 <- FeaturePlot(SO7, features = group3, pt.size = 0) + ggtitle("Type 3 Markers")
plot4 <- FeaturePlot(SO7, features = group4, pt.size = 0) + ggtitle("Type 4 Marker")
plot5 <- FeaturePlot(SO7, features = group5, pt.size = 0) + ggtitle("Type 5 Marker")

# Combine all plots into a single layout
combined_plot <- (plot1 | plot2) / (plot3 | plot4 | plot5)

# Display the combined plot
print(combined_plot)

# Create Feature plots for each group
plot1v <- VlnPlot(SO7, features = group1, pt.size = 0) + ggtitle("Type 1 Markers")
plot2v <- VlnPlot(SO7, features = group2, pt.size = 0) + ggtitle("Type 2 Markers")
plot3v <- VlnPlot(SO7, features = group3, pt.size = 0) + ggtitle("Type 3 Markers")
plot4v <- VlnPlot(SO7, features = group4, pt.size = 0) + ggtitle("Type 4 Marker")
plot5v <- VlnPlot(SO7, features = group5, pt.size = 0) + ggtitle("Type 5 Marker")

# Combine all plots into a single layout
combined_plotv <- (plot1v | plot2v)
combined_plotv2 <- (plot3v | plot4v | plot5v)

# Display the combined plot
print(combined_plotv)

print(combined_plotv2)

print(plot1v)

print(plot5)

7 Pathway Analysis of Each Cluster

# Load required libraries
library(clusterProfiler)
library(org.Mm.eg.db)
library(dplyr)
library(tibble)
library(ggplot2)
library(stringr)

# Loop through types 1 to 5
for (i in 1:5) {
  cat("\n\n## Type", i, "Pathways\n\n")
  
  # Dynamically get the marker object
  marker_obj_name <- paste0("type", i, "_markers")
  DEG_list <- get(marker_obj_name)
  
  # Prepare marker table
  markers <- DEG_list %>%
    rownames_to_column(var = "SYMBOL")
  
  # Convert SYMBOL to ENTREZID
  ENTREZ_list <- bitr(
    geneID = markers$SYMBOL,
    fromType = "SYMBOL",
    toType = "ENTREZID",
    OrgDb = org.Mm.eg.db
  )
  
  # Merge and filter
  markers <- ENTREZ_list %>%
    inner_join(markers, by = "SYMBOL") %>%
    filter(p_val_adj < 0.05)
  
  # Select upregulated genes
  pos.markers <- markers %>%
    filter(avg_log2FC > 0) %>%
    arrange(desc(avg_log2FC))
  
  pos.ranks <- pos.markers$ENTREZID[abs(pos.markers$avg_log2FC) > 0]
  
  # Run enrichment if there are genes
  if (length(pos.ranks) > 0) {
    pos_go <- enrichGO(
      gene = pos.ranks,
      OrgDb = org.Mm.eg.db,
      ont = "BP",
      readable = TRUE
    )
    
    # Plot
    print(
      dotplot(pos_go) +
        ggtitle(paste("Type", i, "Upregulated Pathways")) +
        theme_classic() +
        theme(
          plot.title = element_text(hjust = 0.5),
          legend.position = "left",
          axis.text.y = element_text(size = 10)
        ) +
        scale_y_discrete(labels = function(x) str_wrap(x, width = 40))
    )
  } else {
    cat("No significant upregulated genes for Type", i, "\n\n")
  }
}
## 
## 
## ## Type 1 Pathways
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = markers$SYMBOL, fromType = "SYMBOL", toType =
## "ENTREZID", : 2.4% of input gene IDs are fail to map...
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

## 
## 
## ## Type 2 Pathways
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = markers$SYMBOL, fromType = "SYMBOL", toType =
## "ENTREZID", : 6.35% of input gene IDs are fail to map...
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

## 
## 
## ## Type 3 Pathways
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = markers$SYMBOL, fromType = "SYMBOL", toType =
## "ENTREZID", : 13.79% of input gene IDs are fail to map...
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

## 
## 
## ## Type 4 Pathways
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = markers$SYMBOL, fromType = "SYMBOL", toType =
## "ENTREZID", : 40.94% of input gene IDs are fail to map...
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

## 
## 
## ## Type 5 Pathways
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = markers$SYMBOL, fromType = "SYMBOL", toType =
## "ENTREZID", : 27.62% of input gene IDs are fail to map...
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.